library(tidyverse)
library(DESeq2)
library(biomaRt)
## Warning: replacing previous import 'utils::findMatches' by
## 'S4Vectors::findMatches' when loading 'AnnotationDbi'
library(clusterProfiler)
library(org.Hs.eg.db)
library(factoextra)
library(vegan)
library(ggrepel)
library(ggsci)
library(clusterProfiler)
library(enrichplot)
library(DT)

Importing Data

setwd("/storage2/Elena/")


table2 <- read_tsv("/storage2/Elena/rnaseqRenombrado/nextflow/star_salmon/salmon.merged.transcript_counts.tsv")

meta <- colnames(table2) %>% 
  as_tibble() %>% 
  filter(value != "tx") %>% 
  filter(value != "gene_id") %>% 
  rename(sample = value) %>% 
  separate(sample,c("c1","c2"), remove = F, sep = "\\.") %>% 
  unite("grupo",c1:c2, sep = "_") %>% 
  mutate(grupo = str_replace(grupo,"_\\d+$","")) %>% 
  as_data_frame() %>% 
  arrange(sample) %>% 
  column_to_rownames("sample")
## Warning: `as_data_frame()` was deprecated in tibble 2.0.0.
## ℹ Please use `as_tibble()` (with slightly different semantics) to convert to a
##   tibble, or `as.data.frame()` to convert to a data frame.
## This warning is displayed once every 8 hours.
## Call `lifecycle::last_lifecycle_warnings()` to see where this warning was
## generated.

Importing annotation

We import biological annotation from ENSEMBL

mart <- useMart(biomart = "ENSEMBL_MART_ENSEMBL",
                dataset = "hsapiens_gene_ensembl",
                host = 'https://www.ensembl.org')

ttg <- biomaRt::getBM(
  attributes = c("ensembl_transcript_id","ensembl_gene_id","external_gene_name", "description",
                 "entrezgene_id","gene_biotype","transcript_biotype"),
  mart = mart)

Setting data to DE analysis and QC

table_sum <- table2 %>% 
  pivot_longer(names_to = "Sample", values_to = "counts", -(tx:gene_id)) %>% 
  rename(ensembl_transcript_id = tx) %>% 
  inner_join(ttg) %>% 
  filter(transcript_biotype == "protein_coding") %>% 
  group_by(ensembl_gene_id,Sample,"external_gene_name","description",
                 "entrezgene_id","gene_biotype") %>%
  summarise(counts = sum(counts)) %>% 
  ungroup() %>% 
  group_by(ensembl_gene_id) %>% 
  mutate(TotalZeros = sum(counts ==0)) %>% 
  mutate(TotalCounts = sum(counts)) %>% 
  ungroup() %>% 
  filter(TotalZeros < 8) %>% 
  filter(TotalCounts > 50) %>% 
  dplyr::select(ensembl_gene_id,Sample,counts) %>% 
  distinct() %>% 
  pivot_wider(names_from = Sample, values_from = counts)%>% 
  column_to_rownames("ensembl_gene_id") %>% 
  round()
## Joining with `by = join_by(ensembl_transcript_id)`
## Warning in inner_join(., ttg): Detected an unexpected many-to-many relationship between `x` and `y`.
## ℹ Row 1 of `x` matches multiple rows in `y`.
## ℹ Row 283701 of `y` matches multiple rows in `x`.
## ℹ If a many-to-many relationship is expected, set `relationship =
##   "many-to-many"` to silence this warning.
## `summarise()` has grouped output by 'ensembl_gene_id', 'Sample',
## '"external_gene_name"', '"description"', '"entrezgene_id"'. You can override
## using the `.groups` argument.
dds_gene <- DESeqDataSetFromMatrix(countData = table_sum %>% as.matrix(), 
                                   colData = meta, 
                                   design = ~grupo
                                   )
## converting counts to integer mode
## Warning in DESeqDataSet(se, design = design, ignoreRank): some variables in
## design formula are characters, converting to factors
dds_gene <- DESeq(dds_gene,sfType = "poscounts", fitType = "mean")
## estimating size factors
## estimating dispersions
## gene-wise dispersion estimates
## mean-dispersion relationship
## final dispersion estimates
## fitting model and testing

Quality Control

table2 %>% pivot_longer(names_to = "Sample", values_to = "counts", -(tx:gene_id)) %>% 
  group_by(Sample) %>% 
  summarise(TotalCounts = sum(counts)) %>% 
  full_join(meta %>% as_tibble(rownames = "Sample")) %>% 
  ggplot(aes(x= Sample, y = TotalCounts, fill = grupo)) + geom_col() + coord_flip() + theme_light()+ labs(title = "Raw Data")
## Joining with `by = join_by(Sample)`

counts(dds_gene, normalized = T) %>% as_tibble(rownames = "ensembl_gene_id") %>% pivot_longer(names_to = "Sample", values_to = "counts", -ensembl_gene_id) %>% 
  group_by(Sample) %>% 
  summarise(TotalCounts = sum(counts)) %>% 
  full_join(meta %>% as_tibble(rownames = "Sample")) %>% 
  ggplot(aes(x= Sample, y = TotalCounts, fill = grupo)) + geom_col() + coord_flip() + theme_light() + labs(title = "Normalize Data")
## Joining with `by = join_by(Sample)`

counts(dds_gene, normalized = T) %>% 
  as_tibble(rownames = "ensembl_gene_id") %>% 
  pivot_longer(names_to = "Sample", values_to = "counts", -ensembl_gene_id) %>% 
  full_join(meta %>% as_tibble(rownames = "Sample")) %>% 
  inner_join(ttg) %>% 
  group_by(Sample,gene_biotype) %>% 
  summarise(counts = sum(counts)) %>% 
  ggplot(aes(x = Sample, y = counts, fill = gene_biotype)) + 
  geom_col(position = "dodge") + 
  scale_y_log10() + coord_flip()
## Joining with `by = join_by(Sample)`
## Joining with `by = join_by(ensembl_gene_id)`
## Warning in inner_join(., ttg): Detected an unexpected many-to-many relationship between `x` and `y`.
## ℹ Row 1 of `x` matches multiple rows in `y`.
## ℹ Row 73971 of `y` matches multiple rows in `x`.
## ℹ If a many-to-many relationship is expected, set `relationship =
##   "many-to-many"` to silence this warning.
## `summarise()` has grouped output by 'Sample'. You can override using the
## `.groups` argument.

To check the quality of the sample we test the homogenicity of the groups. In this case we represent the samples using Partial Least Squares Discriminant Analysis (PLS-DA)

library(mixOmics)
## Loading required package: MASS
## 
## Attaching package: 'MASS'
## The following object is masked from 'package:AnnotationDbi':
## 
##     select
## The following object is masked from 'package:clusterProfiler':
## 
##     select
## The following object is masked from 'package:biomaRt':
## 
##     select
## The following object is masked from 'package:dplyr':
## 
##     select
## 
## Loaded mixOmics 6.24.0
## Thank you for using mixOmics!
## Tutorials: http://mixomics.org
## Bookdown vignette: https://mixomicsteam.github.io/Bookdown
## Questions, issues: Follow the prompts at http://mixomics.org/contact-us
## Cite us:  citation('mixOmics')
## 
## Attaching package: 'mixOmics'
## The following object is masked from 'package:purrr':
## 
##     map
X = counts(dds_gene, normalized = T) %>% t()
Y = counts(dds_gene, normalized = T) %>% 
  t() %>%
  as_tibble(rownames = "Sample") %>%  
  inner_join(meta %>% 
               rownames_to_column("Sample")) %>% 
  mutate(grupo = as.factor(grupo)) %>% 
  pull(grupo)
## Joining with `by = join_by(Sample)`
pl <- splsda(X,Y, ncomp = 10, scale = T)
plotIndiv(pl, ind.names = Y, ellipse = TRUE, legend =TRUE)

To test the homogenicity we perform a PERMANOVA test

dist <- counts(dds_gene, normalized = T) %>% 
  t() %>% vegdist(method = "binomial")

adonis2(dist ~ grupo, data = meta, permutations = 10000)
## Permutation test for adonis under reduced model
## Terms added sequentially (first to last)
## Permutation: free
## Number of permutations: 10000
## 
## adonis2(formula = dist ~ grupo, data = meta, permutations = 10000)
##          Df SumOfSqs      R2      F Pr(>F)   
## grupo     3   729396 0.61428 4.2467 0.0013 **
## Residual  8   458010 0.38572                 
## Total    11  1187406 1.00000                 
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

The homogenicity of the groups is statistically significant so the sample quality is good enough

Statistical Test

All the test were performed at gene level and including lncRNA. You can find all the table results at https://github.com/irycisBioinfo/rnaSeq_elena

For each comparison we have performed an Gene Set Enrichment Analysis of GO term and KEGG pathways.

C_4H vs Q2_4H

C_4H_vs_Q2_4H <- results(dds_gene, contrast = c("grupo","C_4H","Q2_4H")) %>% 
  as_tibble(rownames = "ensembl_gene_id") %>%
  inner_join(ttg %>% dplyr::select(-ensembl_transcript_id, -transcript_biotype) %>% distinct())
## Joining with `by = join_by(ensembl_gene_id)`
C_4H_vs_Q2_4H %>%  
  write_tsv("C_4H_vs_Q2_4H.tsv")


C_4H_vs_Q2_4H %>% 
  mutate(sig = ifelse(padj < 0.01,"Significative","No-Significative")) %>%
  mutate(label = ifelse(sig == "Significative",external_gene_name,NA)) %>% 
  ggplot(aes(x= log2FoldChange, 
             y = -log10(padj), 
             color = sig, 
             size = sqrt(baseMean), 
             label = label)) +
  geom_point(alpha = 0.5) +
  geom_text_repel(size = 1)+
  scale_color_d3() +
  theme_light() + 
  labs(title = "C_4H vs Q2_4H") 
## Warning: Removed 10 rows containing missing values (`geom_point()`).
## Warning: Removed 13995 rows containing missing values (`geom_text_repel()`).

C_4H_vs_Q2_4H %>% 
  mutate(sig = ifelse(padj < 0.01,"Significative","No-Significative")) %>%
  mutate(label = ifelse(sig == "Significative",external_gene_name,NA)) %>% 
  ggplot(aes(x= baseMean, 
             y = log2FoldChange, 
             color = sig, 
             size = sqrt(baseMean), 
             label = label)) +
  geom_point(alpha = 0.5) +
  geom_text_repel(size = 1)+
  scale_color_d3() +
  theme_light() + 
  scale_x_log10() +
  labs(title = "C_4H vs Q2_4H") 
## Warning: Removed 10 rows containing missing values (`geom_point()`).
## Removed 13995 rows containing missing values (`geom_text_repel()`).

### Funtional Annotation

C_4H_vs_Q2_4H_enrich <- C_4H_vs_Q2_4H %>%  
  filter(padj < 0.001) %>% 
  dplyr::select(entrezgene_id) %>% 
    distinct() %>% 
    drop_na() %>%  
    pull(entrezgene_id)

C_4H_vs_Q2_4H_enrich_go <- enrichGO(gene =C_4H_vs_Q2_4H_enrich,
                                   OrgDb         = org.Hs.eg.db,
                                    ont           = "ALL",
                                    pAdjustMethod = "BH",
                                    pvalueCutoff  = 0.01,
                                    qvalueCutoff  = 0.05,
                                    readable      = TRUE)

if ((C_4H_vs_Q2_4H_enrich_go %>% as.data.frame() %>% nrow()) > 0  )
{
  barplot(C_4H_vs_Q2_4H_enrich_go, showCategory = 20)
} else{
  print("No enriched Pathways")
}

C_4H_vs_Q2_4H_enrich_kegg<- enrichKEGG(gene = C_4H_vs_Q2_4H_enrich,
                                        organism     = 'hsa',
                                        pvalueCutoff = 0.05)
## Reading KEGG annotation online: "https://rest.kegg.jp/link/hsa/pathway"...
## Reading KEGG annotation online: "https://rest.kegg.jp/list/pathway/hsa"...
## --> No gene can be mapped....
## --> Expected input gene ID: 51703,5232,3795,57016,10327,5161
## --> return NULL...
if ((C_4H_vs_Q2_4H_enrich_kegg %>% as.data.frame() %>% nrow()) > 0  )
{
  barplot(C_4H_vs_Q2_4H_enrich_kegg, showCategory = 20)
} else{
  print("No enriched Pathways")
}
## [1] "No enriched Pathways"

C_8H vs Q2_8H

C_8H_vs_Q2_8H <- results(dds_gene, contrast = c("grupo","C_8H","Q2_8H")) %>% 
  as_tibble(rownames = "ensembl_gene_id") %>%
  inner_join(ttg %>% dplyr::select(-ensembl_transcript_id, -transcript_biotype) %>% distinct())
## Joining with `by = join_by(ensembl_gene_id)`
C_8H_vs_Q2_8H %>%  
  write_tsv("C_8H_vs_Q2_8H.tsv")

C_8H_vs_Q2_8H %>% 
  mutate(sig = ifelse((padj < 0.001 & abs(log2FoldChange) >1.5) ,"Significative","No-Significative")) %>%
  mutate(label = ifelse(sig == "Significative",external_gene_name,NA)) %>% 
  ggplot(aes(x= log2FoldChange, 
             y = -log10(padj), 
             color = sig, 
             size = sqrt(baseMean), 
             label = label)) +
  geom_point(alpha = 0.5) +
  geom_text_repel(size = 1)+
  scale_color_d3() +
  theme_light() + 
  labs(title = "C_8H vs Q2_8H") 
## Warning: Removed 10 rows containing missing values (`geom_point()`).
## Warning: Removed 13516 rows containing missing values (`geom_text_repel()`).
## Warning: ggrepel: 428 unlabeled data points (too many overlaps). Consider
## increasing max.overlaps

C_8H_vs_Q2_8H %>% 
  mutate(sig = ifelse(padj < 0.01,"Significative","No-Significative")) %>%
  mutate(label = ifelse(sig == "Significative",external_gene_name,NA)) %>% 
  ggplot(aes(x= baseMean, 
             y = log2FoldChange, 
             color = sig, 
             size = sqrt(baseMean), 
             label = label)) +
  geom_point(alpha = 0.5) +
  geom_text_repel(size = 1)+
  scale_color_d3() +
  theme_light() + 
  scale_x_log10()+
  labs(title = "C_8H_vs_Q2_8H") 
## Warning: Removed 10 rows containing missing values (`geom_point()`).
## Warning: Removed 9341 rows containing missing values (`geom_text_repel()`).
## Warning: ggrepel: 4592 unlabeled data points (too many overlaps). Consider
## increasing max.overlaps

### Funtional Annotation

C_8H_vs_Q2_8H_enrich <- C_8H_vs_Q2_8H %>%  
  filter(padj < 0.001 & abs(log2FoldChange) >1.5) %>% 
  dplyr::select(entrezgene_id) %>% 
    distinct() %>% 
    drop_na() %>%  
    pull(entrezgene_id)

C_8H_vs_Q2_8H_enrich_go <- enrichGO(gene =C_8H_vs_Q2_8H_enrich,
                                   OrgDb         = org.Hs.eg.db,
                                    ont           = "ALL",
                                    pAdjustMethod = "BH",
                                    pvalueCutoff  = 0.01,
                                    qvalueCutoff  = 0.05,
                                    readable      = TRUE)

if ((C_8H_vs_Q2_8H_enrich_go %>% as.data.frame() %>% nrow()) > 0  )
{
  barplot(C_8H_vs_Q2_8H_enrich_go, showCategory = 20)
} else{
  print("No enriched Pathways")
}

C_8H_vs_Q2_8H_enrich_kegg<- enrichKEGG(gene = C_8H_vs_Q2_8H_enrich,
                                        organism     = 'hsa',
                                        pvalueCutoff = 0.05)


if ((C_8H_vs_Q2_8H_enrich_kegg %>% as.data.frame() %>% nrow()) > 0  )
{
  barplot(C_8H_vs_Q2_8H_enrich_kegg, showCategory = 20)
} else{
  print("No enriched Pathways")
}

#### padj <0.01, abs(foldchange>1.5)

C_8H_vs_Q2_8H_enrich <- C_8H_vs_Q2_8H %>%  
  filter(padj < 0.01 & abs(log2FoldChange) >1.5) %>% 
  dplyr::select(entrezgene_id) %>% 
    distinct() %>% 
    drop_na() %>%  
    pull(entrezgene_id)

C_8H_vs_Q2_8H_enrich_go <- enrichGO(gene =C_8H_vs_Q2_8H_enrich,
                                   OrgDb         = org.Hs.eg.db,
                                    ont           = "ALL",
                                    pAdjustMethod = "BH",
                                    pvalueCutoff  = 0.01,
                                    qvalueCutoff  = 0.05,
                                    readable      = TRUE)

if ((C_8H_vs_Q2_8H_enrich_go %>% as.data.frame() %>% nrow()) > 0  )
{
  barplot(C_8H_vs_Q2_8H_enrich_go, showCategory = 20)
} else{
  print("No enriched Pathways")
}

C_8H_vs_Q2_8H_enrich_kegg<- enrichKEGG(gene = C_8H_vs_Q2_8H_enrich,
                                        organism     = 'hsa',
                                        pvalueCutoff = 0.05)


if ((C_8H_vs_Q2_8H_enrich_kegg %>% as.data.frame() %>% nrow()) > 0  )
{
  barplot(C_8H_vs_Q2_8H_enrich_kegg, showCategory = 20)
} else{
  print("No enriched Pathways")
}
## [1] "No enriched Pathways"

padj <0.01, foldchange>1.5

C_8H_vs_Q2_8H_enrich <- C_8H_vs_Q2_8H %>%  
  filter(padj < 0.01 & log2FoldChange >1.5) %>% 
  dplyr::select(entrezgene_id) %>% 
    distinct() %>% 
    drop_na() %>%  
    pull(entrezgene_id)

C_8H_vs_Q2_8H_enrich_go <- enrichGO(gene =C_8H_vs_Q2_8H_enrich,
                                   OrgDb         = org.Hs.eg.db,
                                    ont           = "ALL",
                                    pAdjustMethod = "BH",
                                    pvalueCutoff  = 0.01,
                                    qvalueCutoff  = 0.05,
                                    readable      = TRUE)

if ((C_8H_vs_Q2_8H_enrich_go %>% as.data.frame() %>% nrow()) > 0  )
{
  barplot(C_8H_vs_Q2_8H_enrich_go, showCategory = 20)
} else{
  print("No enriched Pathways")
}
## [1] "No enriched Pathways"
C_8H_vs_Q2_8H_enrich_kegg<- enrichKEGG(gene = C_8H_vs_Q2_8H_enrich,
                                        organism     = 'hsa',
                                        pvalueCutoff = 0.05)


if ((C_8H_vs_Q2_8H_enrich_kegg %>% as.data.frame() %>% nrow()) > 0  )
{
  barplot(C_8H_vs_Q2_8H_enrich_kegg, showCategory = 20)
} else{
  print("No enriched Pathways")
}
## [1] "No enriched Pathways"

padj <0.01, foldchange<1.5

C_8H_vs_Q2_8H_enrich <- C_8H_vs_Q2_8H %>%  
  filter(padj < 0.01 & log2FoldChange <1.5) %>% 
  dplyr::select(entrezgene_id) %>% 
    distinct() %>% 
    drop_na() %>%  
    pull(entrezgene_id)

C_8H_vs_Q2_8H_enrich_go <- enrichGO(gene =C_8H_vs_Q2_8H_enrich,
                                   OrgDb         = org.Hs.eg.db,
                                    ont           = "ALL",
                                    pAdjustMethod = "BH",
                                    pvalueCutoff  = 0.01,
                                    qvalueCutoff  = 0.05,
                                    readable      = TRUE)

if ((C_8H_vs_Q2_8H_enrich_go %>% as.data.frame() %>% nrow()) > 0  )
{
  barplot(C_8H_vs_Q2_8H_enrich_go, showCategory = 20)
} else{
  print("No enriched Pathways")
}

C_8H_vs_Q2_8H_enrich_kegg<- enrichKEGG(gene = C_8H_vs_Q2_8H_enrich,
                                        organism     = 'hsa',
                                        pvalueCutoff = 0.05)


if ((C_8H_vs_Q2_8H_enrich_kegg %>% as.data.frame() %>% nrow()) > 0  )
{
  barplot(C_8H_vs_Q2_8H_enrich_kegg, showCategory = 20)
} else{
  print("No enriched Pathways")
}

padj <0.01, abs(foldchange>2)

C_8H_vs_Q2_8H_enrich <- C_8H_vs_Q2_8H %>%  
  filter(padj < 0.01 & abs(log2FoldChange) >2) %>% 
  dplyr::select(entrezgene_id) %>% 
    distinct() %>% 
    drop_na() %>%  
    pull(entrezgene_id)

C_8H_vs_Q2_8H_enrich_go <- enrichGO(gene =C_8H_vs_Q2_8H_enrich,
                                   OrgDb         = org.Hs.eg.db,
                                    ont           = "ALL",
                                    pAdjustMethod = "BH",
                                    pvalueCutoff  = 0.01,
                                    qvalueCutoff  = 0.05,
                                    readable      = TRUE)

if ((C_8H_vs_Q2_8H_enrich_go %>% as.data.frame() %>% nrow()) > 0  )
{
  barplot(C_8H_vs_Q2_8H_enrich_go, showCategory = 20)
} else{
  print("No enriched Pathways")
}

C_8H_vs_Q2_8H_enrich_kegg<- enrichKEGG(gene = C_8H_vs_Q2_8H_enrich,
                                        organism     = 'hsa',
                                        pvalueCutoff = 0.05)


if ((C_8H_vs_Q2_8H_enrich_kegg %>% as.data.frame() %>% nrow()) > 0  )
{
  barplot(C_8H_vs_Q2_8H_enrich_kegg, showCategory = 20)
} else{
  print("No enriched Pathways")
}
## [1] "No enriched Pathways"

padj <0.01, foldchange>2

C_8H_vs_Q2_8H_enrich <- C_8H_vs_Q2_8H %>%  
  filter(padj < 0.01 & log2FoldChange >2) %>% 
  dplyr::select(entrezgene_id) %>% 
    distinct() %>% 
    drop_na() %>%  
    pull(entrezgene_id)

C_8H_vs_Q2_8H_enrich_go <- enrichGO(gene =C_8H_vs_Q2_8H_enrich,
                                   OrgDb         = org.Hs.eg.db,
                                    ont           = "ALL",
                                    pAdjustMethod = "BH",
                                    pvalueCutoff  = 0.01,
                                    qvalueCutoff  = 0.05,
                                    readable      = TRUE)

if ((C_8H_vs_Q2_8H_enrich_go %>% as.data.frame() %>% nrow()) > 0  )
{
  barplot(C_8H_vs_Q2_8H_enrich_go, showCategory = 20)
} else{
  print("No enriched Pathways")
}
## [1] "No enriched Pathways"
C_8H_vs_Q2_8H_enrich_kegg<- enrichKEGG(gene = C_8H_vs_Q2_8H_enrich,
                                        organism     = 'hsa',
                                        pvalueCutoff = 0.05)


if ((C_8H_vs_Q2_8H_enrich_kegg %>% as.data.frame() %>% nrow()) > 0  )
{
  barplot(C_8H_vs_Q2_8H_enrich_kegg, showCategory = 20)
} else{
  print("No enriched Pathways")
}

#### padj <0.01, foldchange<2

C_8H_vs_Q2_8H_enrich <- C_8H_vs_Q2_8H %>%  
  filter(padj < 0.01 & log2FoldChange <2) %>% 
  dplyr::select(entrezgene_id) %>% 
    distinct() %>% 
    drop_na() %>%  
    pull(entrezgene_id)

C_8H_vs_Q2_8H_enrich_go <- enrichGO(gene =C_8H_vs_Q2_8H_enrich,
                                   OrgDb         = org.Hs.eg.db,
                                    ont           = "ALL",
                                    pAdjustMethod = "BH",
                                    pvalueCutoff  = 0.01,
                                    qvalueCutoff  = 0.05,
                                    readable      = TRUE)

if ((C_8H_vs_Q2_8H_enrich_go %>% as.data.frame() %>% nrow()) > 0  )
{
  barplot(C_8H_vs_Q2_8H_enrich_go, showCategory = 20)
} else{
  print("No enriched Pathways")
}

C_8H_vs_Q2_8H_enrich_kegg<- enrichKEGG(gene = C_8H_vs_Q2_8H_enrich,
                                        organism     = 'hsa',
                                        pvalueCutoff = 0.05)


if ((C_8H_vs_Q2_8H_enrich_kegg %>% as.data.frame() %>% nrow()) > 0  )
{
  barplot(C_8H_vs_Q2_8H_enrich_kegg, showCategory = 20)
} else{
  print("No enriched Pathways")
}

C_4H vs C_8H

C_4H_vs_C_8H <- results(dds_gene, contrast = c("grupo","C_4H","C_8H")) %>% 
  as_tibble(rownames = "ensembl_gene_id") %>%
  inner_join(ttg %>% dplyr::select(-ensembl_transcript_id, -transcript_biotype) %>% distinct())
## Joining with `by = join_by(ensembl_gene_id)`
C_4H_vs_C_8H %>%  
  write_tsv("C_4H_vs_C_8H.tsv")

C_4H_vs_C_8H %>% 
  mutate(sig = ifelse((padj < 0.001 & abs(log2FoldChange) >1.5) ,"Significative","No-Significative")) %>%
  mutate(label = ifelse(sig == "Significative",external_gene_name,NA)) %>% 
  ggplot(aes(x= log2FoldChange, 
             y = -log10(padj), 
             color = sig, 
             size = sqrt(baseMean), 
             label = label)) +
  geom_point(alpha = 0.5) +
  geom_text_repel(size = 1)+
  scale_color_d3() +
  theme_light() + 
  labs(title = "C_4H_vs_C_8H") + 
  facet_wrap(~gene_biotype, scales = "free")
## Warning: Removed 10 rows containing missing values (`geom_point()`).
## Warning: Removed 13995 rows containing missing values (`geom_text_repel()`).

C_4H_vs_C_8H %>% 
  mutate(sig = ifelse(padj < 0.01,"Significative","No-Significative")) %>%
  mutate(label = ifelse(sig == "Significative",external_gene_name,NA)) %>% 
  ggplot(aes(x= baseMean, 
             y = log2FoldChange, 
             color = sig, 
             size = sqrt(baseMean), 
             label = label)) +
  geom_point(alpha = 0.5) +
  geom_text_repel(size = 1)+
  scale_color_d3() +
  theme_light() + 
  scale_x_log10()+
  labs(title = "C_4H_vs_C_8H")
## Warning: Removed 10 rows containing missing values (`geom_point()`).
## Warning: Removed 13896 rows containing missing values (`geom_text_repel()`).
## Warning: ggrepel: 79 unlabeled data points (too many overlaps). Consider
## increasing max.overlaps

### Funtional Annotation

C_4H_vs_C_8H_enrich <- C_4H_vs_C_8H %>%  
  filter(padj < 0.001 & abs(log2FoldChange) >1.5) %>% 
  dplyr::select(entrezgene_id) %>% 
    distinct() %>% 
    drop_na() %>%  
    pull(entrezgene_id)

C_4H_vs_C_8H_enrich_go <- enrichGO(gene =C_4H_vs_C_8H_enrich,
                                   OrgDb         = org.Hs.eg.db,
                                    ont           = "ALL",
                                    pAdjustMethod = "BH",
                                    pvalueCutoff  = 0.01,
                                    qvalueCutoff  = 0.05,
                                    readable      = TRUE)

if ((C_4H_vs_C_8H_enrich_go %>% as.data.frame() %>% nrow()) > 0  )
{
  barplot(C_4H_vs_C_8H_enrich_go, showCategory = 20)
} else{
  print("No enriched Pathways")
}
## [1] "No enriched Pathways"
C_4H_vs_C_8H_enrich_kegg<- enrichKEGG(gene = C_4H_vs_C_8H_enrich,
                                        organism     = 'hsa',
                                        pvalueCutoff = 0.05)


if ((C_4H_vs_C_8H_enrich_kegg %>% as.data.frame() %>% nrow()) > 0  )
{
  barplot(C_4H_vs_C_8H_enrich_kegg, showCategory = 20)
} else{
  print("No enriched Pathways")
}

Q2_4H vs Q2_8H

Q2_4H_vs_Q2_8H <- results(dds_gene, contrast = c("grupo","Q2_4H","Q2_8H")) %>% 
  as_tibble(rownames = "ensembl_gene_id") %>%
  inner_join(ttg %>% dplyr::select(-ensembl_transcript_id, -transcript_biotype) %>% distinct())
## Joining with `by = join_by(ensembl_gene_id)`
Q2_4H_vs_Q2_8H %>%  
  write_tsv("Q2_4H_vs_Q2_8H.tsv")

Q2_4H_vs_Q2_8H %>% 
  mutate(sig = ifelse((padj < 0.001 & abs(log2FoldChange) >1.5) ,"Significative","No-Significative")) %>%
  mutate(label = ifelse(sig == "Significative",external_gene_name,NA)) %>% 
  ggplot(aes(x= log2FoldChange, 
             y = -log10(padj), 
             color = sig, 
             size = sqrt(baseMean), 
             label = label)) +
  geom_point(alpha = 0.5) +
  geom_text_repel(size = 1)+
  scale_color_d3() +
  theme_light() + 
  labs(title = "Q2_4H_vs_Q2_8H") + 
  facet_wrap(~gene_biotype, scales = "free")
## Warning: Removed 10 rows containing missing values (`geom_point()`).
## Warning: Removed 13881 rows containing missing values (`geom_text_repel()`).
## Warning: ggrepel: 68 unlabeled data points (too many overlaps). Consider
## increasing max.overlaps

Q2_4H_vs_Q2_8H %>% 
  mutate(sig = ifelse(padj < 0.01,"Significative","No-Significative")) %>%
  mutate(label = ifelse(sig == "Significative",external_gene_name,NA)) %>% 
  ggplot(aes(x= baseMean, 
             y = log2FoldChange, 
             color = sig, 
             size = sqrt(baseMean), 
             label = label)) +
  geom_point(alpha = 0.5) +
  geom_text_repel(size = 1)+
  scale_color_d3() +
  theme_light() + 
  labs(title = "Q2_4H_vs_Q2_8H")
## Warning: Removed 10 rows containing missing values (`geom_point()`).
## Warning: Removed 11121 rows containing missing values (`geom_text_repel()`).
## Warning: ggrepel: 2864 unlabeled data points (too many overlaps). Consider
## increasing max.overlaps

### Funtional Annotation

Q2_4H_vs_Q2_8H_enrich <- Q2_4H_vs_Q2_8H %>%  
  filter(padj < 0.001 & abs(log2FoldChange) >1.5) %>% 
  dplyr::select(entrezgene_id) %>% 
    distinct() %>% 
    drop_na() %>%  
    pull(entrezgene_id)

Q2_4H_vs_Q2_8H_enrich_go <- enrichGO(gene =Q2_4H_vs_Q2_8H_enrich,
                                   OrgDb         = org.Hs.eg.db,
                                    ont           = "ALL",
                                    pAdjustMethod = "BH",
                                    pvalueCutoff  = 0.01,
                                    qvalueCutoff  = 0.05,
                                    readable      = TRUE)

if ((Q2_4H_vs_Q2_8H_enrich_go %>% as.data.frame() %>% nrow()) > 0  )
{
  barplot(Q2_4H_vs_Q2_8H_enrich_go, showCategory = 20)
} else{
  print("No enriched Pathways")
}

Q2_4H_vs_Q2_8H_enrich_kegg<- enrichKEGG(gene = Q2_4H_vs_Q2_8H_enrich,
                                        organism     = 'hsa',
                                        pvalueCutoff = 0.05)


if ((Q2_4H_vs_Q2_8H_enrich_kegg %>% as.data.frame() %>% nrow()) > 0  )
{
  barplot(Q2_4H_vs_Q2_8H_enrich_kegg, showCategory = 20)
} else{
  print("No enriched Pathways")
}

C_4H vs Q2_8H

C_4H_vs_Q2_8H <- results(dds_gene, contrast = c("grupo","C_4H","Q2_8H")) %>% 
  as_tibble(rownames = "ensembl_gene_id") %>%
  inner_join(ttg %>% dplyr::select(-ensembl_transcript_id, -transcript_biotype) %>% distinct())
## Joining with `by = join_by(ensembl_gene_id)`
C_4H_vs_Q2_8H %>%  
  write_tsv("C_4H_vs_Q2_8H.tsv")

C_4H_vs_Q2_8H %>% 
  mutate(sig = ifelse((padj < 0.001 & abs(log2FoldChange) >1.5) ,"Significative","No-Significative")) %>%
  mutate(label = ifelse(sig == "Significative",external_gene_name,NA)) %>% 
  ggplot(aes(x= log2FoldChange, 
             y = -log10(padj), 
             color = sig, 
             size = sqrt(baseMean), 
             label = label)) +
  geom_point(alpha = 0.5) +
  geom_text_repel(size = 1)+
  scale_color_d3() +
  theme_light() + 
  labs(title = "C_4H_vs_Q2_8H") + 
  facet_wrap(~gene_biotype, scales = "free")
## Warning: Removed 10 rows containing missing values (`geom_point()`).
## Warning: Removed 13742 rows containing missing values (`geom_text_repel()`).
## Warning: ggrepel: 195 unlabeled data points (too many overlaps). Consider
## increasing max.overlaps

C_4H_vs_Q2_8H %>% 
  mutate(sig = ifelse(padj < 0.01,"Significative","No-Significative")) %>%
  mutate(label = ifelse(sig == "Significative",external_gene_name,NA)) %>% 
  ggplot(aes(x= baseMean, 
             y = log2FoldChange, 
             color = sig, 
             size = sqrt(baseMean), 
             label = label)) +
  geom_point(alpha = 0.5) +
  geom_text_repel(size = 1)+
  scale_color_d3() +
  theme_light() + 
  scale_x_log10()+
  labs(title = "C_4H_vs_Q2_8H")
## Warning: Removed 10 rows containing missing values (`geom_point()`).
## Warning: Removed 10169 rows containing missing values (`geom_text_repel()`).
## Warning: ggrepel: 3761 unlabeled data points (too many overlaps). Consider
## increasing max.overlaps

### Funtional Annotation

C_4H_vs_Q2_8H_enrich <- C_4H_vs_Q2_8H %>%  
  filter(padj < 0.001 & abs(log2FoldChange) >1.5) %>% 
  dplyr::select(entrezgene_id) %>% 
    distinct() %>% 
    drop_na() %>%  
    pull(entrezgene_id)

C_4H_vs_Q2_8H_enrich_go <- enrichGO(gene =C_4H_vs_Q2_8H_enrich,
                                   OrgDb         = org.Hs.eg.db,
                                    ont           = "ALL",
                                    pAdjustMethod = "BH",
                                    pvalueCutoff  = 0.01,
                                    qvalueCutoff  = 0.05,
                                    readable      = TRUE)

if ((C_4H_vs_Q2_8H_enrich_go %>% as.data.frame() %>% nrow()) > 0  )
{
  barplot(C_4H_vs_Q2_8H_enrich_go, showCategory = 20)
} else{
  print("No enriched Pathways")
}
## [1] "No enriched Pathways"
C_4H_vs_Q2_8H_enrich_kegg<- enrichKEGG(gene = C_4H_vs_Q2_8H_enrich,
                                        organism     = 'hsa',
                                        pvalueCutoff = 0.05)


if ((C_4H_vs_Q2_8H_enrich_kegg %>% as.data.frame() %>% nrow()) > 0  )
{
  barplot(C_4H_vs_Q2_8H_enrich_kegg, showCategory = 20)
} else{
  print("No enriched Pathways")
}
## [1] "No enriched Pathways"

Post-analisis